

<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Spacy-BIST Parser &mdash; NLP Architect by Intel® AI Lab 0.5.2 documentation</title>
  

  
  
  
  

  
  <script type="text/javascript" src="_static/js/modernizr.min.js"></script>
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
        <script type="text/javascript" src="_static/jquery.js"></script>
        <script type="text/javascript" src="_static/underscore.js"></script>
        <script type="text/javascript" src="_static/doctools.js"></script>
        <script type="text/javascript" src="_static/language_data.js"></script>
        <script type="text/javascript" src="_static/install.js"></script>
        <script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
    
    <script type="text/javascript" src="_static/js/theme.js"></script>

    

  
  <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="_static/nlp_arch_theme.css" type="text/css" />
  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto+Mono" type="text/css" />
  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Open+Sans:100,900" type="text/css" />
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

          
            <a href="index.html">
          

          
            
            <img src="_static/logo.png" class="logo" alt="Logo"/>
          
          </a>

          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <ul>
<li class="toctree-l1"><a class="reference internal" href="quick_start.html">Quick start</a></li>
<li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="publications.html">Publications</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorials.html">Jupyter Tutorials</a></li>
<li class="toctree-l1"><a class="reference internal" href="model_zoo.html">Model Zoo</a></li>
</ul>
<p class="caption"><span class="caption-text">NLP/NLU Models</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="tagging/sequence_tagging.html">Sequence Tagging</a></li>
<li class="toctree-l1"><a class="reference internal" href="sentiment.html">Sentiment Analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="bist_parser.html">Dependency Parsing</a></li>
<li class="toctree-l1"><a class="reference internal" href="intent.html">Intent Extraction</a></li>
<li class="toctree-l1"><a class="reference internal" href="lm.html">Language Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="information_extraction.html">Information Extraction</a></li>
<li class="toctree-l1"><a class="reference internal" href="transformers.html">Transformers</a></li>
<li class="toctree-l1"><a class="reference internal" href="archived/additional.html">Additional Models</a></li>
</ul>
<p class="caption"><span class="caption-text">Optimized Models</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="quantized_bert.html">Quantized BERT</a></li>
<li class="toctree-l1"><a class="reference internal" href="transformers_distillation.html">Transformers Distillation</a></li>
<li class="toctree-l1"><a class="reference internal" href="sparse_gnmt.html">Sparse Neural Machine Translation</a></li>
</ul>
<p class="caption"><span class="caption-text">Solutions</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="absa_solution.html">Aspect Based Sentiment Analysis</a></li>
<li class="toctree-l1"><a class="reference internal" href="term_set_expansion.html">Set Expansion</a></li>
<li class="toctree-l1"><a class="reference internal" href="trend_analysis.html">Trend Analysis</a></li>
</ul>
<p class="caption"><span class="caption-text">For Developers</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="generated_api/nlp_architect_api_index.html">nlp_architect API</a></li>
<li class="toctree-l1"><a class="reference internal" href="developer_guide.html">Developer Guide</a></li>
</ul>

            
          
        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="index.html">NLP Architect by Intel® AI Lab</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="index.html">Docs</a> &raquo;</li>
        
      <li>Spacy-BIST Parser</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="spacy-bist-parser">
<h1>Spacy-BIST Parser<a class="headerlink" href="#spacy-bist-parser" title="Permalink to this headline">¶</a></h1>
<div class="section" id="raw-text-parser-based-on-spacy-and-bist-parsers">
<h2>Raw text parser based on Spacy and BIST parsers<a class="headerlink" href="#raw-text-parser-based-on-spacy-and-bist-parsers" title="Permalink to this headline">¶</a></h2>
<p>The parser uses <a class="reference external" href="https://spacy.io">Spacy</a>’s english model for sentence breaking,
tokenization and token annotations (part-of-speech, lemma, NER).
Dependency relations between tokens are extracted using BIST parser.
The BIST parser is described <a class="reference external" href="https://www.transacl.org/ojs/index.php/tacl/article/viewFile/885/198">here</a>,
and its code is documented <a class="reference internal" href="bist_parser.html"><span class="doc">here</span></a>.</p>
</div>
<div class="section" id="usage">
<h2>Usage<a class="headerlink" href="#usage" title="Permalink to this headline">¶</a></h2>
<p>To use the module, import it like so:</p>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">nlp_architect.pipelines.spacy_bist</span> <span class="kn">import</span> <span class="n">SpacyBISTParser</span>
</pre></div>
</div>
</div>
<div class="section" id="training">
<h2>Training<a class="headerlink" href="#training" title="Permalink to this headline">¶</a></h2>
<p>By default, the parser uses a pre-trained BIST model and Spacy’s English
model (<code class="docutils literal notranslate"><span class="pre">en</span></code>). A pre-trained BIST model is automatically
downloaded (on-demand) to <code class="docutils literal notranslate"><span class="pre">spacy_bist/bist-pretrained/</span></code> and then loaded
from that directory. To use other models, supply a path or link to each
model at initialization (see example below).</p>
<p>For instructions on how to train a BIST model, see <a class="reference internal" href="bist_parser.html"><span class="doc">BIST documentation</span></a>.
For instructions on how to get spaCy <a class="reference external" href="https://spacy.io/usage/models">models</a> or how to train a model see <a class="reference external" href="https://spacy.io/usage/training">spaCy training instructions</a></p>
<div class="section" id="example">
<h3>Example<a class="headerlink" href="#example" title="Permalink to this headline">¶</a></h3>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">parser</span> <span class="o">=</span> <span class="n">SpacyBISTParser</span><span class="p">(</span><span class="n">spacy_model</span><span class="o">=</span><span class="s1">&#39;/path/or/link/to/spacy/model&#39;</span><span class="p">,</span> <span class="n">bist_model</span><span class="o">=</span><span class="s1">&#39;/path/to/bist/model&#39;</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="parsing">
<h2>Parsing<a class="headerlink" href="#parsing" title="Permalink to this headline">¶</a></h2>
<p>The parser accepts a document as a raw text string encoded in UTF-8 format and outputs a
<a class="reference internal" href="generated_api/nlp_architect.common.html#nlp_architect.common.core_nlp_doc.CoreNLPDoc" title="nlp_architect.common.core_nlp_doc.CoreNLPDoc"><code class="xref py py-class docutils literal notranslate"><span class="pre">CoreNLPDoc</span></code></a> instance which contains the annotations (example output below).</p>
<div class="section" id="id1">
<h3>Example<a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h3>
<div class="code python highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">parser</span> <span class="o">=</span> <span class="n">SpacyBISTParser</span><span class="p">()</span>
<span class="n">parsed_doc</span> <span class="o">=</span> <span class="n">parser</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">doc_text</span><span class="o">=</span><span class="s1">&#39;First sentence. Second sentence&#39;</span><span class="p">)</span>
<span class="nb">print</span><span class="p">(</span><span class="n">parsed_doc</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="output">
<h3>Output<a class="headerlink" href="#output" title="Permalink to this headline">¶</a></h3>
<div class="code json highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
    <span class="s2">&quot;doc_text&quot;</span><span class="p">:</span> <span class="s2">&quot;First sentence. Second sentence&quot;</span><span class="p">,</span>
    <span class="s2">&quot;sentences&quot;</span><span class="p">:</span> <span class="p">[</span>
        <span class="p">[</span>
            <span class="p">{</span>
                <span class="s2">&quot;start&quot;</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span>
                <span class="s2">&quot;len&quot;</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span>
                <span class="s2">&quot;pos&quot;</span><span class="p">:</span> <span class="s2">&quot;JJ&quot;</span><span class="p">,</span>
                <span class="s2">&quot;ner&quot;</span><span class="p">:</span> <span class="s2">&quot;ORDINAL&quot;</span><span class="p">,</span>
                <span class="s2">&quot;lemma&quot;</span><span class="p">:</span> <span class="s2">&quot;first&quot;</span><span class="p">,</span>
                <span class="s2">&quot;gov&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
                <span class="s2">&quot;rel&quot;</span><span class="p">:</span> <span class="s2">&quot;amod&quot;</span><span class="p">,</span>
                <span class="s2">&quot;text&quot;</span><span class="p">:</span> <span class="s2">&quot;First&quot;</span>
            <span class="p">},</span>
            <span class="p">{</span>
                <span class="s2">&quot;start&quot;</span><span class="p">:</span> <span class="mi">6</span><span class="p">,</span>
                <span class="s2">&quot;len&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span>
                <span class="s2">&quot;pos&quot;</span><span class="p">:</span> <span class="s2">&quot;NN&quot;</span><span class="p">,</span>
                <span class="s2">&quot;ner&quot;</span><span class="p">:</span> <span class="s2">&quot;&quot;</span><span class="p">,</span>
                <span class="s2">&quot;lemma&quot;</span><span class="p">:</span> <span class="s2">&quot;sentence&quot;</span><span class="p">,</span>
                <span class="s2">&quot;gov&quot;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
                <span class="s2">&quot;rel&quot;</span><span class="p">:</span> <span class="s2">&quot;root&quot;</span><span class="p">,</span>
                <span class="s2">&quot;text&quot;</span><span class="p">:</span> <span class="s2">&quot;sentence&quot;</span>
            <span class="p">},</span>
            <span class="p">{</span>
                <span class="s2">&quot;start&quot;</span><span class="p">:</span> <span class="mi">14</span><span class="p">,</span>
                <span class="s2">&quot;len&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
                <span class="s2">&quot;pos&quot;</span><span class="p">:</span> <span class="s2">&quot;.&quot;</span><span class="p">,</span>
                <span class="s2">&quot;ner&quot;</span><span class="p">:</span> <span class="s2">&quot;&quot;</span><span class="p">,</span>
                <span class="s2">&quot;lemma&quot;</span><span class="p">:</span> <span class="s2">&quot;.&quot;</span><span class="p">,</span>
                <span class="s2">&quot;gov&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
                <span class="s2">&quot;rel&quot;</span><span class="p">:</span> <span class="s2">&quot;punct&quot;</span><span class="p">,</span>
                <span class="s2">&quot;text&quot;</span><span class="p">:</span> <span class="s2">&quot;.&quot;</span>
            <span class="p">}</span>
        <span class="p">],</span>
        <span class="p">[</span>
            <span class="p">{</span>
                <span class="s2">&quot;start&quot;</span><span class="p">:</span> <span class="mi">16</span><span class="p">,</span>
                <span class="s2">&quot;len&quot;</span><span class="p">:</span> <span class="mi">6</span><span class="p">,</span>
                <span class="s2">&quot;pos&quot;</span><span class="p">:</span> <span class="s2">&quot;JJ&quot;</span><span class="p">,</span>
                <span class="s2">&quot;ner&quot;</span><span class="p">:</span> <span class="s2">&quot;ORDINAL&quot;</span><span class="p">,</span>
                <span class="s2">&quot;lemma&quot;</span><span class="p">:</span> <span class="s2">&quot;second&quot;</span><span class="p">,</span>
                <span class="s2">&quot;gov&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
                <span class="s2">&quot;rel&quot;</span><span class="p">:</span> <span class="s2">&quot;amod&quot;</span><span class="p">,</span>
                <span class="s2">&quot;text&quot;</span><span class="p">:</span> <span class="s2">&quot;Second&quot;</span>
            <span class="p">},</span>
            <span class="p">{</span>
                <span class="s2">&quot;start&quot;</span><span class="p">:</span> <span class="mi">23</span><span class="p">,</span>
                <span class="s2">&quot;len&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span>
                <span class="s2">&quot;pos&quot;</span><span class="p">:</span> <span class="s2">&quot;NN&quot;</span><span class="p">,</span>
                <span class="s2">&quot;ner&quot;</span><span class="p">:</span> <span class="s2">&quot;&quot;</span><span class="p">,</span>
                <span class="s2">&quot;lemma&quot;</span><span class="p">:</span> <span class="s2">&quot;sentence&quot;</span><span class="p">,</span>
                <span class="s2">&quot;gov&quot;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
                <span class="s2">&quot;rel&quot;</span><span class="p">:</span> <span class="s2">&quot;root&quot;</span><span class="p">,</span>
                <span class="s2">&quot;text&quot;</span><span class="p">:</span> <span class="s2">&quot;sentence&quot;</span>
            <span class="p">}</span>
        <span class="p">]</span>
    <span class="p">]</span>
<span class="p">}</span>
</pre></div>
</div>
</div>
</div>
<div class="section" id="references">
<h2>References<a class="headerlink" href="#references" title="Permalink to this headline">¶</a></h2>
<table class="docutils footnote" frame="void" id="id2" rules="none">
<colgroup><col class="label" /><col /></colgroup>
<tbody valign="top">
<tr><td class="label">[1]</td><td>Kiperwasser, E., &amp; Goldberg, Y. (2016). Simple and Accurate Dependency Parsing Using Bidirectional LSTM Feature Representations. Transactions Of The Association For Computational Linguistics, 4, 313-327. <a class="reference external" href="https://transacl.org/ojs/index.php/tacl/article/view/885/198">https://transacl.org/ojs/index.php/tacl/article/view/885/198</a></td></tr>
</tbody>
</table>
</div>
</div>


           </div>
           
          </div>
          <footer>
  

  <hr/>

  <div role="contentinfo">
    <p>

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
   

</body>
</html>